INTRODUCTION TO SPATIAL STATISTICS IN R

library(tidyverse)
library(broom)
library(janitor)
library(gt)
library(gtsummary)
theme_set(theme_bw())
library(sf)
library(terra)
philly_sf <- read_rds("philadelphia_tracts.rds")
st_crs(philly_sf)
Coordinate Reference System:
  User input: EPSG:4326 
  wkt:
GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        MEMBER["World Geodetic System 1984 (G2296)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    USAGE[
        SCOPE["Horizontal component of 3D system."],
        AREA["World."],
        BBOX[-90,-180,90,180]],
    ID["EPSG",4326]]
st_crs(philly_sf) <- sf::st_crs(4326)

Polygon Data

pt_sf <- philly_sf
class(pt_sf)
[1] "sf"         "data.frame"
glimpse(philly_sf)
Rows: 384
Columns: 15
$ OBJECTID   <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
$ STATEFP10  <fct> 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,…
$ COUNTYFP10 <fct> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,…
$ TRACTCE10  <fct> 009400, 009500, 009600, 013800, 013900, 014000, 014100, 014…
$ GEOID10    <fct> 42101009400, 42101009500, 42101009600, 42101013800, 4210101…
$ NAME10     <fct> 94, 95, 96, 138, 139, 140, 141, 142, 143, 144, 145, 146, 14…
$ NAMELSAD10 <fct> Census Tract 94, Census Tract 95, Census Tract 96, Census T…
$ MTFCC10    <fct> G5020, G5020, G5020, G5020, G5020, G5020, G5020, G5020, G50…
$ FUNCSTAT10 <fct> S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S,…
$ ALAND10    <int> 366717, 319070, 405273, 341256, 562934, 439802, 562132, 789…
$ AWATER10   <int> 0, 0, 0, 0, 0, 0, 0, 277434, 282808, 0, 0, 0, 0, 0, 0, 0, 0…
$ INTPTLAT10 <fct> +39.9632709, +39.9658709, +39.9655396, +39.9764504, +39.975…
$ INTPTLON10 <fct> -075.2322437, -075.2379140, -075.2435075, -075.1771771, -07…
$ LOGRECNO   <fct> 10429, 10430, 10431, 10468, 10469, 10470, 10471, 10472, 104…
$ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((-75.22927 3..., MULTIPOLYGON (…
dim(philly_sf)
[1] 384  15
head(philly_sf)
OBJECTID STATEFP10 COUNTYFP10 TRACTCE10 GEOID10 NAME10 NAMELSAD10 MTFCC10 FUNCSTAT10 ALAND10 AWATER10 INTPTLAT10 INTPTLON10 LOGRECNO geometry
0 1 42 101 009400 42101009400 94 Census Tract 94 G5020 S 366717 0 +39.9632709 -075.2322437 10429 MULTIPOLYGON (((-75.22927 3…
1 2 42 101 009500 42101009500 95 Census Tract 95 G5020 S 319070 0 +39.9658709 -075.2379140 10430 MULTIPOLYGON (((-75.23536 3…
2 3 42 101 009600 42101009600 96 Census Tract 96 G5020 S 405273 0 +39.9655396 -075.2435075 10431 MULTIPOLYGON (((-75.24343 3…
3 4 42 101 013800 42101013800 138 Census Tract 138 G5020 S 341256 0 +39.9764504 -075.1771771 10468 MULTIPOLYGON (((-75.17341 3…
4 5 42 101 013900 42101013900 139 Census Tract 139 G5020 S 562934 0 +39.9750563 -075.1711846 10469 MULTIPOLYGON (((-75.17313 3…
5 6 42 101 014000 42101014000 140 Census Tract 140 G5020 S 439802 0 +39.9735358 -075.1630966 10470 MULTIPOLYGON (((-75.16141 3…
## extract just spatial attributes
st_geometry(philly_sf)
Geometry set for 384 features 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
Geodetic CRS:  WGS 84
First 5 geometries:
philly_geo <- st_geometry(philly_sf)
philly_geo[[1]]
ggplot(philly_sf) +
  geom_sf(fill = "lightblue", color = "white") +
  theme_minimal() +
  labs(title = "Philadelphia Census Tracts")

Line Data

bike_lanes <- st_read("Bike_Network.shp")
Reading layer `Bike_Network' from data source 
  `C:\Users\KAsab\Desktop\SPATIAL STATISTICS\Bike_Network.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 5101 features and 8 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: -75.26927 ymin: 39.87651 xmax: -74.96572 ymax: 40.124
Geodetic CRS:  WGS 84
ggplot(bike_lanes) +
  geom_sf(color = "steelblue", size = 0.3) +
  theme_minimal() +
  labs(title = "Philadelphia Bike Network")

ggplot() +
  geom_sf(data = philly_sf, fill = "gray95", color = "white") +
  geom_sf(data = bike_lanes, color = "steelblue", size = 0.3) +
  theme_minimal() +
  labs(title = "Bike Lanes Overlaid on Philadelphia Census Tracts")

st_geometry(bike_lanes)
Geometry set for 5101 features 
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: -75.26927 ymin: 39.87651 xmax: -74.96572 ymax: 40.124
Geodetic CRS:  WGS 84
First 5 geometries:

Points Data

philly_crimes <- read_csv("philly_crimes.csv")
class(philly_crimes)
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
philly_crimes <- philly_crimes %>%
  filter(!is.na(lat), !is.na(lng)) %>%
  filter(lat > 38 & lat < 41,    # Philadelphia bounding box
         lng < -70 & lng > -76)  # longitude should be west of 0
crimes2 <-read_csv("crime2.csv")
class(crimes2)
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
crimes <-  st_as_sf(crimes2,
                    coords = c("Lon","Lat"),
                    crs = 4326)
library(sf)

# Convert to sf object
philly_crimes_sf <- st_as_sf(philly_crimes, 
                              coords = c("lng","lat"),  
                              crs = 4326)  # WGS 84
# st_geometry_type(philly_crimes_sf)
st_crs(philly_crimes_sf)
Coordinate Reference System:
  User input: EPSG:4326 
  wkt:
GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        MEMBER["World Geodetic System 1984 (G2296)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    USAGE[
        SCOPE["Horizontal component of 3D system."],
        AREA["World."],
        BBOX[-90,-180,90,180]],
    ID["EPSG",4326]]
class(crimes)
[1] "sf"         "tbl_df"     "tbl"        "data.frame"
ggplot() +
  geom_sf(data = philly_sf, fill = "gray95", color = "white") +
  geom_sf(data = philly_crimes_sf, color = "red", alpha = 0.4, size = 0.5) +
  theme_minimal() +
  labs(title = "Philadelphia Crime Locations")

# st_geometry_type(philly_crimes_sf)
st_crs(philly_crimes_sf)
Coordinate Reference System:
  User input: EPSG:4326 
  wkt:
GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        MEMBER["World Geodetic System 1984 (G2296)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    USAGE[
        SCOPE["Horizontal component of 3D system."],
        AREA["World."],
        BBOX[-90,-180,90,180]],
    ID["EPSG",4326]]
plot(st_geometry(philly_crimes_sf))  # base R check

Aggregate Crime Counts by Tract

#For each crime point, find the census tract polygon it falls within, and append the tract attributes to the crime.

joined <- st_join(philly_crimes_sf, philly_sf)  # assigns each point to a tract



crime_counts <- joined %>%
  group_by(GEOID10) %>%  # or appropriate tract ID
  summarise(n = n())


philly_sf <- left_join(
  philly_sf,
  st_drop_geometry(crime_counts),
  by = "GEOID10" 
)


ggplot(philly_sf) +
  geom_sf(aes(fill = n)) +
  scale_fill_viridis_c(na.value = "white") +
  theme_minimal() +
  labs(title = "Crime Counts per Census Tract")

Time-Series or Temporal Filtering

recent_crimes <- philly_crimes_sf %>%
  filter(dispatch_date > as.Date("2025-07-01"))

ggplot() +
  geom_sf(data = philly_sf, fill = "gray90") +
  geom_sf(data = recent_crimes, color = "blue", alpha = 0.5, size = 0.3) +
  labs(title = "Crimes Since July 2025")+
  theme_bw()

ggplot() +
  geom_sf(data = philly_sf, fill = "gray90", color = NA) +
  stat_density_2d(
    data = st_coordinates(philly_crimes_sf) %>% as.data.frame(),
    aes(x = X, y = Y, fill = after_stat(level)), 
    geom = "polygon", alpha = 0.4
  ) +
  scale_fill_viridis_c() +
  labs(title = "Crime Density Heatmap") +
  theme_minimal()+ 
  theme(axis.title = element_blank())

philly_crimes_sf |> 
  distinct(text_general_code) |> 
  print(n = Inf)
# A tibble: 30 × 1
   text_general_code                      
   <chr>                                  
 1 Robbery No Firearm                     
 2 Aggravated Assault No Firearm          
 3 Aggravated Assault Firearm             
 4 Thefts                                 
 5 Burglary Non-Residential               
 6 Burglary Residential                   
 7 Robbery Firearm                        
 8 Theft from Vehicle                     
 9 Motor Vehicle Theft                    
10 Rape                                   
11 All Other Offenses                     
12 Narcotic / Drug Law Violations         
13 Other Assaults                         
14 Disorderly Conduct                     
15 Arson                                  
16 Fraud                                  
17 Weapon Violations                      
18 Prostitution and Commercialized Vice   
19 Other Sex Offenses (Not Commercialized)
20 Forgery and Counterfeiting             
21 Vandalism/Criminal Mischief            
22 Receiving Stolen Property              
23 DRIVING UNDER THE INFLUENCE            
24 Offenses Against Family and Children   
25 Gambling Violations                    
26 Liquor Law Violations                  
27 Embezzlement                           
28 Public Drunkenness                     
29 Vagrancy/Loitering                     
30 Homicide - Criminal                    
homicide <- philly_crimes_sf |> 
  filter(text_general_code == "Homicide - Criminal")

fraud <- philly_crimes_sf |> 
  filter(text_general_code == "Fraud")
  ggplot()+
     geom_sf(data = philly_sf, fill = "gray95", color = "white")+
  geom_sf(data = homicide, color = "red")+
    geom_sf(data = fraud, color = "blue")+
    theme_minimal()

Rasta Dataset

library(datasets)
class(volcano)
[1] "matrix" "array" 
volcano <- volcano
library(raster)

volcano_raster <- raster(volcano)
plot(volcano_raster, main = "Volcano Raster (raster pkg)")

library(terra)

volcano_terra <- rast(volcano)
plot(volcano_terra, main = "Volcano Raster (terra pkg)")

library(raster)
library(ggplot2)

# 1. Convert matrix to raster
volcano_raster <- raster(volcano)

# 2. Convert to data frame
volcano_df <- as.data.frame(volcano_raster, xy = TRUE)
names(volcano_df)[3] <- "elevation"  # Rename layer column

# 3. Plot with ggplot2
ggplot(volcano_df, aes(x = x, y = y, fill = elevation)) +
  geom_raster() +
  scale_fill_viridis_c() +
  coord_equal() +
  theme_minimal() +
  labs(title = "Volcano Elevation Map", fill = "Elevation")

library(terra)
volcano_rast <- rast(volcano)
volcano_df <- as.data.frame(volcano_rast, xy = TRUE)
names(volcano_df)[3] <- "elevation"

ggplot(volcano_df, aes(x = x, y = y, fill = elevation)) +
  geom_raster() +
  scale_fill_viridis_c() +
  coord_equal() +
  theme_minimal()+
  labs(title = "Volcano Elevation Map", fill = "Elevation")

library(ggplot2)

# Convert volcano matrix to data frame
volcano_df <- as.data.frame(as.table(volcano))
names(volcano_df) <- c("y", "x", "elevation")

# Create plot with raster and contour lines
ggplot(volcano_df, aes(x = x, y = y, z = elevation)) +
  geom_raster(aes(fill = elevation)) +
  geom_contour(color = "black", size = 0.3) +
  scale_fill_viridis_c(name = "Elevation") +
  labs(title = "Volcano Elevation Map with Contours") +
  theme_minimal()

Spatial Hypothesis Testing

nyc <- st_read("Counties.shp")
Reading layer `Counties' from data source 
  `C:\Users\KAsab\Desktop\SPATIAL STATISTICS\Counties.shp' using driver `ESRI Shapefile'
Simple feature collection with 62 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 105571.4 ymin: 4480943 xmax: 779932.1 ymax: 4985476
Projected CRS: NAD83 / UTM zone 18N
nys_lyme_data <- read_rds("nys_lyme_data.rds")
lyme <- nys_lyme_data
class(nys_lyme_data)
[1] "sf"         "data.frame"
ggplot()+
  geom_sf(data = nyc,color = "black",fill = "gray95")+
  theme_minimal()+
  labs(title = 'New York City Counties')

ggplot()+
  geom_sf(data = nys_lyme_data,color = "steelblue",fill = "gray95")+
  theme_minimal()+
  labs(title = 'New York State Lyme Disease Data')

st_geometry((nys_lyme_data))
Geometry set for 62 features 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 105571.4 ymin: 4480943 xmax: 779932.1 ymax: 4985476
Projected CRS: NAD83 / UTM zone 18N
First 5 geometries:
library(tmap)
tmap_mode("view") # set mode to interactive
  tm_shape(nys_lyme_data)+ # specify sf object with geographic attribute of interest
    tm_polygons("Lyme.Incidence.Rate") # specify column with value of interest
# St lawrence has missing values for ithe incidence rate so you remove it
nys_lyme_data <- nys_lyme_data |> 
  filter(!is.na(Lyme.Incidence.Rate))
tmap_mode("view")
tm_shape(nys_lyme_data)+
  tm_polygons("Lyme.Incidence.Rate")

Global Clustering (Moran’s I)

summary(nys_lyme_data[["Lyme.Incidence.Rate"]])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.90   13.50   36.30   80.27   98.50  599.60 
nys_lyme_data |> 
  ggplot(aes(x = Lyme.Incidence.Rate))+
  geom_histogram(color = "white")

nys_lyme_data |> 
  ggplot(aes(x = Lyme.Incidence.Rate))+
  geom_boxplot()

nys_lyme_data <-  nys_lyme_data |> 
  mutate(log_lyme_incidence = log(Lyme.Incidence.Rate))
nys_lyme_data |> 
  ggplot(aes(x = log_lyme_incidence))+
  geom_histogram(color = "white",bins = 20)

nys_lyme_data |> 
  ggplot(aes(x = log_lyme_incidence))+
  geom_boxplot()

tmap_mode("view")
tm_shape(nys_lyme_data)+
  tm_polygons("log_lyme_incidence")

Define Neighbouring Polygons

library(spdep)

## create nb object from lyme dataset
lyme_nb <- poly2nb(nys_lyme_data,queen = TRUE) # queen case
# poly2nb is for finding contiguinity neighbors
class(lyme_nb)
[1] "nb"
str(lyme_nb)
List of 61
 $ : int [1:6] 11 20 42 45 46 47
 $ : int [1:4] 5 26 50 60
 $ : int [1:4] 30 31 41 59
 $ : int [1:4] 9 12 13 53
 $ : int [1:4] 2 7 15 60
 $ : int [1:7] 12 23 34 38 49 54 58
 $ : int [1:2] 5 15
 $ : int [1:4] 48 50 53 54
 $ : int [1:5] 4 12 13 27 39
 $ : int [1:2] 16 17
 $ : int [1:5] 1 14 20 42 55
 $ : int [1:7] 4 6 9 27 34 53 54
 $ : int [1:7] 4 9 20 39 47 52 55
 $ : int [1:4] 11 36 40 55
 $ : int [1:5] 5 7 19 32 60
 $ : int [1:5] 10 17 21 56 57
 $ : int [1:3] 10 16 21
 $ : int [1:4] 21 22 29 45
 $ : int [1:6] 15 26 28 32 37 60
 $ : int [1:6] 1 11 13 42 47 55
 $ : int [1:6] 16 17 18 22 45 56
 $ : int [1:6] 18 21 25 29 33 39
 $ : int [1:3] 6 25 38
 $ : int [1:3] 31 41 43
 $ : int [1:4] 22 23 33 38
 $ : int [1:6] 2 19 28 35 50 60
 $ : int [1:6] 9 12 33 34 38 39
 $ : int [1:5] 19 26 35 37 58
 $ : int [1:6] 18 22 39 45 46 47
 $ : int [1:4] 3 41 51 59
 $ : int [1:3] 3 24 41
 $ : int [1:3] 15 19 37
 $ : int [1:5] 22 25 27 38 39
 $ : int [1:4] 6 12 27 38
 $ : int [1:6] 26 28 49 50 58 61
 $ : int [1:5] 14 40 44 52 55
 $ : int [1:3] 19 28 32
 $ : int [1:6] 6 23 25 27 33 34
 $ : int [1:7] 9 13 22 27 29 33 47
 $ : int [1:4] 14 36 44 59
 $ : int [1:5] 3 24 30 31 43
 $ : int [1:5] 1 11 20 45 57
 $ : int [1:2] 24 41
 $ : int [1:3] 36 40 59
 $ : int [1:8] 1 18 21 29 42 46 56 57
 $ : int [1:4] 1 29 45 47
 $ : int [1:6] 1 13 20 29 39 46
 $ : int [1:5] 8 49 50 54 61
 $ : int [1:6] 6 35 48 54 58 61
 $ : int [1:6] 2 8 26 35 48 61
 $ : int 30
 $ : int [1:3] 13 36 55
 $ : int [1:4] 4 8 12 54
 $ : int [1:6] 6 8 12 48 49 53
 $ : int [1:6] 11 13 14 20 36 52
 $ : int [1:4] 16 21 45 57
 $ : int [1:4] 16 42 45 56
 $ : int [1:4] 6 28 35 49
 $ : int [1:4] 3 30 40 44
 $ : int [1:5] 2 5 15 19 26
 $ : int [1:4] 35 48 49 50
 - attr(*, "class")= chr "nb"
 - attr(*, "region.id")= chr [1:61] "1" "2" "3" "4" ...
 - attr(*, "call")= language poly2nb(pl = nys_lyme_data, queen = TRUE)
 - attr(*, "type")= chr "queen"
 - attr(*, "snap")= num 0.01
 - attr(*, "sym")= logi TRUE
 - attr(*, "ncomp")=List of 2
  ..$ nc     : num 1
  ..$ comp.id: num [1:61] 1 1 1 1 1 1 1 1 1 1 ...
# view neighbors of first polygon
lyme_nb[[1]]
[1] 11 20 42 45 46 47
nys_lyme_data[["NAME"]][1]
[1] "Albany"
nys_lyme_data[["NAME"]][c(11,20,42,45,46,47)]
[1] "Columbia"    "Greene"      "Rensselaer"  "Saratoga"    "Schenectady"
[6] "Schoharie"  

Assigning Weights to neighbors

## calculate weights from nb object, we will specify style = "W" for equal weights
lyme_weights <- nb2listw(lyme_nb, style = "W")

class(lyme_weights)
[1] "listw" "nb"   
str(lyme_weights, max.level = 1)
List of 3
 $ style     : chr "W"
 $ neighbours:List of 61
  ..- attr(*, "class")= chr "nb"
  ..- attr(*, "region.id")= chr [1:61] "1" "2" "3" "4" ...
  ..- attr(*, "call")= language poly2nb(pl = nys_lyme_data, queen = TRUE)
  ..- attr(*, "type")= chr "queen"
  ..- attr(*, "snap")= num 0.01
  ..- attr(*, "sym")= logi TRUE
  ..- attr(*, "ncomp")=List of 2
 $ weights   :List of 61
  ..- attr(*, "mode")= chr "binary"
  ..- attr(*, "W")= logi TRUE
  ..- attr(*, "comp")=List of 1
 - attr(*, "class")= chr [1:2] "listw" "nb"
 - attr(*, "region.id")= chr [1:61] "1" "2" "3" "4" ...
 - attr(*, "call")= language nb2listw(neighbours = lyme_nb, style = "W")
 - attr(*, "zero.policy")= logi FALSE
## The weights of the neighbors for the first polygon (Albany)
## Albany has 6 neighbors

lyme_weights[["weights"]][1]
[[1]]
[1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
lyme[["NAME"]][2]
[1] "Allegany"
lyme_nb[[2]]
[1]  5 26 50 60
lyme_weights[["weights"]][2]
[[1]]
[1] 0.25 0.25 0.25 0.25

Perform Hypothesis Testing

## calculate Moran's I statistic and perform hypothesis testing using "moran.test" (analytical calculation) and "moran.mc" (via montecarlo simulation)

## These functions require that we specify the variable of interest and the list of neighbor weights for each polygon

## The option alternative = "greater" specifies testing foe positive spatial autocorrelation , which is alo the default for these functions

## The "moran.mc" function also requires that we specify the number of simulations with option "nsim"
## Analytical test - quicker computation but sensitive to irregularly distributed polygons

moran.test(nys_lyme_data[["log_lyme_incidence"]],lyme_weights,alternative = "greater")

    Moran I test under randomisation

data:  nys_lyme_data[["log_lyme_incidence"]]  
weights: lyme_weights    

Moran I statistic standard deviate = 8.7379, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
      0.720555645      -0.016666667       0.007118479 
moran.test(nys_lyme_data[["log_lyme_incidence"]],lyme_weights,alternative = "greater") |> 
  tidy()
estimate1 estimate2 estimate3 statistic p.value method alternative
0.7205556 -0.0166667 0.0071185 8.737856 0 Moran I test under randomisation greater
## Monte carlo simulation is slower but the preferred method for an accurate p-value

MC <- moran.mc(nys_lyme_data[["log_lyme_incidence"]],lyme_weights,nsim = 999,alternative = "greater")

MC

    Monte-Carlo simulation of Moran I

data:  nys_lyme_data[["log_lyme_incidence"]] 
weights: lyme_weights  
number of simulations + 1: 1000 

statistic = 0.72056, observed rank = 1000, p-value = 0.001
alternative hypothesis: greater
log_incidence_rates <- nys_lyme_data |> pull(log_lyme_incidence)

mc_result <- moran.mc(log_incidence_rates,lyme_weights,nsim = 999,alternative = "greater")

mc_result

    Monte-Carlo simulation of Moran I

data:  log_incidence_rates 
weights: lyme_weights  
number of simulations + 1: 1000 

statistic = 0.72056, observed rank = 1000, p-value = 0.001
alternative hypothesis: greater
names(mc_result)
[1] "statistic"   "parameter"   "p.value"     "alternative" "method"     
[6] "data.name"   "res"        
library(ggplot2)

# Convert to data frame for ggplot
mc_df <- data.frame(simulated_I = mc_result$res)


ggplot(mc_df, aes(x = simulated_I)) +
  geom_histogram(fill = "gray80", color = "white", bins = 30) +
  geom_vline(xintercept = mc_result$statistic, color = "red", size = 1.2) +
  labs(title = "Monte Carlo Simulation of Moran's I",
       x = "Simulated Moran's I",
       y = "Frequency",
       subtitle = paste("Observed Moran's I =", round(mc_result$statistic, 4))) +
  theme_minimal()

ggplot(mc_df, aes(x = simulated_I)) +
  geom_density(fill = "gray80", color = "white") +
  geom_vline(xintercept = mc_result$statistic, color = "red", size = 1.2) +
  labs(title = "Monte Carlo Simulation of Moran's I",
       x = "Simulated Moran's I",
       y = "Frequency",
       subtitle = paste("Observed Moran's I =", round(mc_result$statistic, 4))) +
  theme_minimal()